/* 
Set directory 
*/

    global root ENTER DIRECTORY
                                
	global r4yc 		$root\Data\Young Lives\R4\UKDA-7931-stata11\stata11\india_r4\yc_chcog_anon
	global r4oc 		$root\Data\Young Lives\R4\UKDA-7931-stata11\stata11\india_r4\oc_chcog_anon
	global r3yc 		$root\Data\Young Lives\R3\survey_data_r3\india_r3\youngerchild
	global r3oc 		$root\Data\Young Lives\R3\survey_data_r3\india_r3\olderchild
	global r2yc	 		$root\Data\Young Lives\R2\survey_data_r2\india_r2\Younger Child
	global r2oc	 		$root\Data\Young Lives\R2\survey_data_r2\india_r2\Older Child
	global r1 			$root\Data\Young Lives\R1\survey_data_r1\india_r1
	global data 		$root\Dta\Young Lives
	global school 		$root\Data\Young Lives\School_2010-11\stata11

/* 
STEP TWO
Wave 1
*/
use "$r1\inchildlevel1yrold.dta"

*Creating Cohort Level Identifier
gen cohort = 0

*Age
gen age = 1

append using "$r1\inchildlevel8yrold.dta"

replace cohort = 1 if cohort == .
replace age = 8 if age == .

keep CHILDID DINT CDINT clustid commid REGION typesite seta setb setab ravens cohort age

rename *, lower

*Creating District Level Identifiers
gen distname = ""

replace distname = "West Godavari" if clustid == 1 | clustid == 2
replace distname = "Srikakulam" if clustid == 3 | clustid == 4 | clustid == 5 | clustid == 6 | clustid == 7
replace distname = "Kadapa" if clustid == 8 | clustid == 9 
replace distname = "Anantapur" if clustid == 10 | clustid == 11 | clustid == 12 | clustid == 13 
replace distname = "Karimnagar" if clustid == 14 | clustid == 15 
replace distname = "Mababubnagar" if clustid == 16 | clustid == 17 | clustid == 18 | clustid == 19
replace distname = "Hyderabad" if clustid == 20

*Creating Wave Identifiers
gen round = 1

foreach var of varlist seta setb setab ravens{
ren `var' r1_`var'
}

save "$data\Round 1\round_1.dta", replace


/* 
STEP THREE
Wave 2
*/
use "$r2yc\inchildlevel5yrold.dta", clear

*Creating Cohort Level Identifier
gen cohort = 0

gen age = 5

append using "$r2oc\inchildquest12yrold.dta" 

replace cohort = 1 if cohort == .

replace age = 12 if age == .

keep CHILDID CLUSTID COMMID TYPESITE REGION CDINT score_ppvt ///
score_cog rscorelang_cog rscorelang_ppvt score_math rscorelang_math cohort DINT age

rename *, lower

*Creating Wave Identifiers
gen round = 2

ren score_ppvt raw_ppvt
ren score_cog raw_cog
ren score_math raw_math

ren rscorelang_cog rasch_cog
ren rscorelang_ppvt rasch_ppvt
ren rscorelang_math rasch_math

foreach var of varlist raw_ppvt raw_cog raw_math rasch_cog rasch_ppvt rasch_math{
ren `var' r2_`var'
}


save "$data\Round 2\round_2.dta", replace

/* 
STEP FOUR
Wave 3
*/

use "$r3yc\in_yc_childlevel.dta", clear

*Creating Cohort Level Identifier
gen cohort = 0

gen age = 8

*merge 1:1 CHILDID using "$data\Round 3\temp_in_yc_hhchild.dta"

append using "$r3oc\in_oc_childlevel.dta"

replace cohort = 1 if cohort == .

replace age = 15 if age == .

*Note: rppvt, rmath and rcloze = rppvt_co, rmath_co and rcloze_co

keep CHILDID CDINT math cloze ppvt math_co ppvt_co cloze_co rmath_co rppvt_co rcloze_co rmath rppvt rcloze cohort age  

rename *, lower

*Creating Wave Identifiers
gen round = 3

ren math raw_math
ren ppvt raw_ppvt
ren cloze raw_cloze

ren math_co raw_math_co
ren ppvt_co raw_ppvt_co
ren cloze_co raw_cloze_co

ren rmath_co rasch_math
ren rppvt_co rasch_ppvt
ren rcloze_co rasch_cloze

foreach var of varlist raw_ppvt raw_cloze raw_math raw_ppvt_co raw_cloze_co raw_math_co rasch_ppvt rasch_cloze rasch_math{
ren `var' r3_`var'
}

save "$data\Round 3\round_3.dta", replace

*Wave 4
use "$r4yc\in_r4_yccog_youngerchild.dta", clear

gen cohort = 0

gen age = 12

append using "$r4oc\in_r4_occog_olderchild.dta", force

replace cohort = 1 if cohort == .

replace age = 19 if age == .

rename *, lower
gen tempin = "IN"
gen tempino = "IN0"
egen childid_1 = concat(tempin childcode)
egen childid_2 = concat(tempino childcode)
gen childid = childid_1
replace childid = childid_2 if childcode<99999

gen cdint = mdy(mathmth, mathday, mathyear)

*tagging if ppvt and math dates are not the same
gen mismatch_dates = 1 if mathday != ppvtday & cohort == 0

keep childid engl_raw ppvt_raw lang_raw maths_raw sibmaths_raw cohort age cdint mismatch_dates

ren engl_raw raw_english
ren ppvt_raw raw_ppvt
ren lang_raw raw_telugu
ren maths_raw raw_math
ren sibmaths_raw raw_sibmath

gen round = 5

foreach var of varlist raw_ppvt raw_math raw_telugu raw_sibmath raw_english{
ren `var' r5_`var'
}

save "$data\round_5.dta", replace

append using "$data\Round 3\round_3.dta"

append using "$data\Round 2\round_2.dta"

append using "$data\Round 1\round_1.dta"

save "$data\allchild_survey_rounds.dta", replace

/* 
STEP FIVE
School Survey
*/

use "$school\z_in_sch_tblchildmathtestdh.dta", clear
	ren CHILDID-MTHRSP21, lower
keep childid dint mthitm*
	gen round = 4 
	gen cohort = 0
	gen age = 10

foreach v in 1 2 3 4 5 6 7 8 9{
ren mthitm0`v' mthitm`v'
}

recode mthitm* (2=0) (77 79 88 99 -77 -79 -88 -99 = .)
egen ss_admin=rownonmiss(mthitm*)
drop if ss_admin==0

forval i = 1/21{
replace mthitm`i'=0 if mthitm`i'==.
}
ren mthitm4 math5
ren mthitm5 math6
ren mthitm6 math7
ren mthitm7 math8
ren mthitm8 math10
ren mthitm9 math11
ren mthitm10 math12
ren mthitm11 math13
ren mthitm12 math14
ren mthitm13 math15
ren mthitm14 math16
ren mthitm15 math17
ren mthitm16 math18
ren mthitm17 math20
ren mthitm18 math22
ren mthitm19 math24
ren mthitm20 math26
ren mthitm21 math28

foreach i of numlist 1 2 3{
	ren mthitm`i' math`i'
}

egen raw_math = rowtotal(math1-math28)

keep childid dint round cohort raw_math age

save "$data\School Survey\ss_math_full.dta",replace

********************

use "$school\z_in_sch_tblchildenglishtestdh", clear

ren CHILDID- SNTCPX74, lower

drop engrsp* splerr* sntcpx* undwrd* teststr1 testend1 teststr2 testend2 sntcmp*

	gen round = 4 
	gen cohort = 0
	gen age = 10

mvdecode engitm* , mv(77 88 99 79)

egen ss_admin=rownonmiss(engitm*)
	drop if ss_admin==0
	drop ss_admin

sort childid


forval i = 1/9{
ren engitm0`i' engitm`i'
}

recode engitm* (2=0)(.=0)

egen raw_english = rowtotal(engitm1-engitm29)

keep childid dint round cohort raw_english age
save "$data\School Survey\ss_english_full.dta",replace

*******************************
use "$school\z_in_sch_tblchildtelugutestdh.dta", clear

ren CHILDID-SNTCPX84,lower
drop telrsp* undwrd* splerr* sntcpx* sntcmp* teststr1 testend1 testlang

mvdecode telitm*, mv(77 88 99 79)

gen round = 4 
gen cohort = 0
gen age = 10
	
egen ss_admin=rownonmiss(telitm*)
	drop if ss_admin==0
	drop ss_admin

sort childid
egen id=group(childid)
forval i = 1/9{
	ren telitm0`i' telitm`i'
}

recode telitm* (2=0)(.=0)

egen raw_telugu = rowtotal(telitm1-telitm32)

keep childid dint round cohort raw_telugu age

save "$data\School Survey\ss_telugu_full.dta",replace

merge 1:1 childid using "$data\School Survey\ss_english_full.dta"

drop _merge

merge 1:1 childid using "$data\School Survey\ss_math_full.dta"

drop _merge

foreach var of varlist raw_telugu raw_english raw_math{
ren `var' r4_`var'
}

ren dint cdint

save "$data\School Survey\ss.dta",replace

/* 
STEP SIX
Appending all waves and creating variables for analysis
*/

use "$data\allchild_survey_rounds.dta", clear

append using "$data\School Survey\ss.dta"

sort childid round 
foreach var of varlist clustid region typesite {
bys childid: replace `var' = `var'[_n-1] if `var' == .
}

foreach var of varlist commid distname {
bys childid: replace `var' = `var'[_n-1] if `var' == ""
}


foreach var of varlist clustid  {
bys childid: replace `var' = `var'[_n-1] if `var' == 90
}

*r2_raw_cog  r3_raw_cloze  ///
 *r1_ravens r1_seta r1_setab r1_setb
*Generating normalized test scores from raw scores

foreach var of varlist  r2_raw_math r2_raw_ppvt r3_raw_math r3_raw_ppvt r4_raw_math r5_raw_ppvt r5_raw_math r4_raw_english r4_raw_telugu r5_raw_english  r5_raw_telugu {
sum `var' if cohort == 0
gen n`var' = (`var' - r(mean))/r(sd) if cohort == 0
sum `var' if cohort == 1
replace n`var' = (`var' - r(mean))/r(sd) if cohort == 1
}

egen n_raw_math = rowtotal(nr2_raw_math nr3_raw_math nr4_raw_math nr5_raw_math), miss
egen n_raw_ppvt = rowtotal(nr2_raw_ppvt nr3_raw_ppvt nr5_raw_ppvt), miss
egen n_raw_english = rowtotal(nr4_raw_english nr5_raw_english), miss
egen n_raw_telugu = rowtotal(nr4_raw_telugu nr5_raw_telugu), miss

keep childid cdint dint cohort round distname n_raw_math n_raw_ppvt n_raw_english n_raw_telugu mismatch_dates
*Date/Day
gen idate = cdint

replace idate = dint if idate == .

format idate %dM_d,_CY

gen year= year(idate)
gen month= month(idate)
gen day = day(idate)
gen dayofweek = dow(idate) 
*day_of_week, contains 0 for Sunday, 1 for Monday, ..., 6 for Saturday*

*Creating district ids
gen distid = 1 if distname == "West Godavari"
replace distid = 2 if distname == "Srikakulam"
replace distid = 3 if distname == "Kadapa"
replace distid = 4 if distname == "Anantapur"
replace distid = 5 if distname == "Karimnagar"
replace distid = 6 if distname == "Mababubnagar"
replace distid = 7 if distname == "Hyderabad"


sort childid cohort round 

order childid cohort round  _all

save "$data\all_ts.dta",replace

